THIS IS MY NEW TEST Load packages + make it so table always displays NA’s
suppressMessages(silent <- lapply(
c("plyr", "dplyr", "tidyverse", "data.table", "vroom", "knitr"),
library, character.only=T))
table = function (..., useNA = 'always') base::table(..., useNA = useNA)Load UK Biobank dataset
bd=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukb34137.tab", delim="\t", show_col_types = FALSE)
source("src/components/ukb34137_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
bd=as_tibble(bd)
dim(bd)## [1] 502527 5172
bd=vroom("/Users/mike/Documents/R_files/UKBpheno/pheno/ukbXXXXX.tab", delim="\t", show_col_types = FALSE)
source("src/components/ukbXXXXX_factordata.R") #file provided by UKB "ukbxxxxx_loaddata.R" without the loading part, to label the responses in survey questions
bd=as_tibble(bd)
dim(bd)Load in second UK Biobank dataset
#read.lines("src/components/load_UKBphenotables.r") #how I made this file
bd_join4<-vroom("bd_join4.tab", delim="\t", show_col_types = FALSE)## Warning: One or more parsing issues, see `problems()` for details
bd_join4=as_tibble(bd_join4)
dim(bd_join4)## [1] 501560 2584
Read in extra files
source("src/components/manyColsToDummy.R")
hormonemeds<-read.csv("src/components/sexHormoneMeds.csv")
source("src/components/SSRI_Meds.R")
codes<-read.table("src/components/phenocodes.txt", header=F)
QCids<-read.table("src/components/bd_QC-keep.txt",header=TRUE)
withdrawn<-read.csv("src/components/w48818_20210809.csv", header = FALSE)
as_tibble(hormonemeds);as_tibble(ssricodes);as_tibble(codes);as_tibble(QCids);as_tibble(withdrawn)## # A tibble: 160 × 2
## code name
## <int> <chr>
## 1 1140869270 "\"medroxyprogesterone\""
## 2 1140910674 "\"ethinylnortestosterone\""
## 3 1140857656 "\"methyltestosterone product\""
## 4 1140865136 "\"yohimbine/pemoline/methyltestosterone\""
## 5 1140868532 "\"testosterone product\""
## 6 1140857690 "\"oestradiol 25mg implant 36 week\""
## 7 1140857700 "\"oestradiol 1mg/1ml injection\""
## 8 1141165318 "\"cetrorelix\""
## 9 1140917306 "\"bicalutamide\""
## 10 1140917310 "\"casodex 50mg tablet\""
## # … with 150 more rows
## # A tibble: 9 × 1
## value
## <dbl>
## 1 1140921600
## 2 1141180212
## 3 1140879540
## 4 1140867888
## 5 1140867878
## 6 1140867876
## 7 1140867884
## 8 1140867888
## 9 1140867860
## # A tibble: 30 × 2
## V1 V2
## <int> <chr>
## 1 30620 Alanine_aminotransferase
## 2 30600 Albumin
## 3 30610 Alkaline_phosphatase
## 4 30630 Apolipoprotein_A
## 5 30640 Apolipoprotein_B
## 6 30650 Aspartate_aminotransferase
## 7 30710 C-reactive_protein
## 8 30680 Calcium
## 9 30690 Cholesterol
## 10 30700 Creatinine
## # … with 20 more rows
## # A tibble: 356,980 × 1
## IID
## <int>
## 1 1000013
## 2 1000036
## 3 1000048
## 4 1000055
## 5 1000067
## 6 1000072
## 7 1000080
## 8 1000099
## 9 1000106
## 10 1000123
## # … with 356,970 more rows
## # A tibble: 68 × 1
## V1
## <int>
## 1 1044796
## 2 1046204
## 3 1046731
## 4 1154359
## 5 1305102
## 6 1394987
## 7 1410887
## 8 1425582
## 9 1525271
## 10 1733666
## # … with 58 more rows
Extract columns from bd dataframe
pheno<-bd%>%select(f.eid,f.21003.0.0, f.31.0.0,
f.189.0.0,
f.54.0.0, f.22000.0.0,
f.1558.0.0,
paste("f.", codes$V1, ".0.0", sep=""))
colnames(pheno)<-c("IID", "Age", "Sex",
"Townsend",
"Assessment_center", "Geno_batch",
"AlcoholFreq",
codes$V2)
pheno<-as_tibble(pheno)EOF